# coding=utf8
import json
import time
from pprint import pprint

from bson import ObjectId

from libs.sele import driver_new_session
from urllib.parse import quote
from bs4 import BeautifulSoup
from models.weibo import model_weibo


def login(driver):
    driver.get(
        "https://passport.weibo.cn/signin/login")
    driver.execute_script("document.getElementById('loginName').value='15162434911';"
                          "document.getElementById('loginPassword').value='940923zhyc';"
                          "document.getElementById('loginAction').click();")


def search_m_weibo(driver, keyword):
    driver.get(
        "http://m.weibo.cn/main/pages/index?containerid=100103type%3D1%26q%3D{keyword}&type=all".format(
            keyword=quote(keyword))
    )

    driver.execute_script("var cards=document.querySelector('.card-list');"
                          "var cards_children=cards.children;"
                          "while(cards_children.length!=1){"
                          "cards.removeChild(cards_children[0])"
                          "}")

    pid = ObjectId()  # 统一id

    def get_elem(elem):
        for card in elem.find_elements_by_xpath('./div'):
            footer_html = card.find_element_by_tag_name('footer').get_attribute('innerHTML')
            repost_info = []
            for a in BeautifulSoup(footer_html, 'lxml').find_all('a'):
                num = int(a.get_text()) if a.get_text().isdigit() else 0
                repost_info.append(num)
            weibo_info = {
                'Pid': pid,
                'Keyword': keyword,
                'WeiboDetailHref': card.get_attribute('data-jump'),
                'UserHref': card.find_element_by_tag_name('header').find_element_by_tag_name('a').get_attribute(
                    'href'),
                'Weibo': card.find_element_by_tag_name('section').get_attribute('innerHTML'),
                'Repost': repost_info
            }
            # pprint(weibo_info)
            model_weibo.insert_one(weibo_info)

    cards = driver.find_elements_by_css_selector('.card-list')
    get_elem(cards[1])
    # 滚动到底部自动加载
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    i = 1
    while True:
        i += 1
        print("page:" + str(i))
        time.sleep(3)  # 休眠同时确保加载完成
        try:
            driver.find_element_by_class_name('loading')
        except:
            print("没有loading了")
            break
        cards = driver.find_elements_by_css_selector('.card-list')
        get_elem(cards[i])
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);"
                              # "var cards=document.querySelectorAll('.card-list');"
                              # "setTimeout(function(){"
                              # "if(cards.length>2) cards[2].remove();"
                              # "},2000)"
                              )


def search_m_weibo_json(driver, keyword):
    page = 1
    check = 0
    pid = ObjectId()
    while True:
        # print(page)
        surl = "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D{keyword}&type=all&page={page}".format(
            keyword=quote(keyword), page=page)
        print(surl)
        driver.get(surl)
        re_data = driver.find_element_by_tag_name('pre').get_attribute('innerHTML')
        re_json = json.loads(re_data)
        if page > 100:
            break
        if len(re_json['cards']) == 0 or re_json['cards'] is None:
            if check > 5:
                break
            print("稍等片刻,重新请求")
            time.sleep(10)
            check += 1
            continue
        if page == 1:
            card_list = re_json['cards'][2]['card_group']
        else:
            card_list = re_json['cards'][0]['card_group']

        for card in card_list:
            mblog = card['mblog']
            weibo_info = {
                'Pid': pid,
                'Keyword': keyword,
                'Scheme': card['scheme'],
                'MBlog': {
                    'Text': mblog['text'],
                    'Source': mblog['source'],
                    'CreatedTimestamp': mblog['created_timestamp'],
                    'IsLongText': mblog['isLongText'] if 'isLongText' in mblog else False,
                    # 'User': {},
                    'RepostCount': mblog['reposts_count'] if 'reposts_count' in mblog else 0,
                    'CommentsCount': mblog['comments_count'] if 'comments_count' in mblog else 0,
                    'AttitudesCount': mblog['attitudes_count'] if 'attitudes_count' in mblog else 0
                },

            }
            model_weibo.insert_one(weibo_info)
        page += 1
        time.sleep(3)


if __name__ == '__main__':
    driver = driver_new_session()
    login(driver)
    print("登录成功.....请稍等")
    time.sleep(10)
    keyword_list = ["壮阳", "壮阳食物", "补肾", "前列腺", "壮阳药", "伟哥", "玛咖", "玛卡", "野燕麦", "牡蛎", "生蚝", "东革阿里"]
    for keyword in keyword_list:
        print("爬取:" + keyword)
        search_m_weibo_json(driver, keyword)
        break

    driver.close()
